Устанавливаем нужные параметры:
Читаем данные:
hogwarts <- read_csv("/Users/ekaterinavostokova/Downloads/dayavis_BI_2024/data/hogwarts.csv")
hogwarts |> head()
## # A tibble: 6 × 60
## id house course sex blood_status result Transfiguration_exam Potions_exam
## <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 11 Gryf… 4 male Half-blood -250 25 5
## 2 13 Gryf… 5 male Pure-blood -310 49 33
## 3 25 Gryf… 7 fema… Pure-blood -330 54 33
## 4 29 Gryf… 3 male Half-blood -345 70 34
## 5 61 Gryf… 4 male Half-blood -325 77 40
## 6 84 Gryf… 2 fema… Half-blood -195 43 72
## # ℹ 52 more variables: Herbology_exam <dbl>,
## # `Defence against the dark arts_exam` <dbl>, `History of magic_exam` <dbl>,
## # Divinations_exam <dbl>, Astronomy_exam <dbl>, Arythmancy_exam <dbl>,
## # `Care of magical creatures_exam` <dbl>, `Muggle studies_exam` <dbl>,
## # `Study of ancient runs_exam` <dbl>, Flying_exam <dbl>, Charms_exam <dbl>,
## # wand <chr>, `week 1` <dbl>, `week 2` <dbl>, `week 3` <dbl>, `week 4` <dbl>,
## # `week 5` <dbl>, `week 6` <dbl>, `week 7` <dbl>, `week 8` <dbl>, …
##Раздел 1. Столбчатые диаграммы.
ggplot(hogwarts)+
geom_bar(aes(x = course), colour = "black", fill = "lightblue")+
theme_bw()+
theme_custom
ggplot(hogwarts)+
geom_bar(aes(x = fct_infreq(house),
fill = blood_status),
colour = "black",
position = "fill")+
scale_x_discrete(name = "house")+
theme_bw()+
theme_custom
Вывод: на факультете Слизерин нет muggle-born и очень большой процент pure-blood, для остальных факультетов пророрции примерно одинаковые.
hogwarts_filtered <- hogwarts %>% filter(blood_status != "Half-blood")
ggplot(hogwarts_filtered)+
geom_bar(aes(x = fct_infreq(house),
fill = blood_status),
colour = "black",
position = "fill")+
scale_x_discrete(name = "Факультет")+
ylab("Доля")+
geom_hline(yintercept=0.5,
linetype="dashed",
linewidth = 2)+
theme_bw()+
scale_fill_discrete(
labels = c("Muggle-borned" = "Маглорожденные",
"Pure-blood" = "Чистокровные"))+
theme_custom
##Раздел 2. Боксплоты. ### Задание 1. Распределение баллов третьей недели по факультетам.
ggplot(hogwarts)+
geom_boxplot(aes(x = fct_reorder(house, `week 3`, .desc = TRUE),
y = `week 3`),
colour = "grey49")+
theme_bw()+
theme_custom+
scale_x_discrete(name = "House")
ggplot(hogwarts)+
geom_boxplot(aes(x = fct_reorder(house, `week 3`, .desc = TRUE),
y = `week 3`, fill = blood_status),
colour = "grey49", notch = TRUE)+
theme_bw()+
theme_custom+
scale_x_discrete(name = "House")
ggplot(hogwarts)+
geom_boxplot(aes(x = fct_reorder(house, `week 3`, .desc = TRUE),
y = `week 3`, fill = blood_status),
colour = "grey49", notch = TRUE,
outlier.shape = NA,
linewidth = 1.0,
width = 0.5)+
geom_jitter(aes(x = fct_reorder(house, `week 3`, .desc = TRUE),
y = `week 3`), width=0.2, alpha=0.5)+
scale_x_discrete(name = "House") +
ggtitle("Распределение очков недели 3 по факультетам") +
labs(caption = "по данным Хогвартс")+
theme_bw()+
theme_custom+
theme(plot.title = element_text(size = 25), plot.caption = element_text(size = 15))
hogwarts |>
filter(course == 5) |>
mutate(id = as.factor(id)) |>
ggplot() +
geom_segment(aes(y = fct_reorder(id, result),
yend = fct_reorder(id, result),
x = 0,
xend = result)) +
geom_point(aes(y = fct_reorder(id, result, .desc = TRUE),
x = result,
colour = wand),
size = 4) +
scale_colour_manual(values = c("Dragon heartstring" = "red",
"Phoenix feather" = "yellow",
"Unicorn hair" = "grey")) +
theme_bw() +
theme_custom +
scale_y_discrete(name = "Student ID") +
xlab("Final Score") +
ggtitle("Lollipop Plot: Final Scores of 5th Year Students")
ggplot(hogwarts, aes(x = Astronomy_exam,
fill = house))+
geom_histogram(colour = "black", bins = ceiling(log2(nrow(hogwarts))+1))+
scale_fill_manual(values = c("Slytherin" = "green",
"Gryffindor" = "grey",
"Hufflepuff" = "grey",
"Ravenclaw" = "grey"))+
theme_bw()+
theme(
axis.text = element_text(size = 18),
axis.title = element_text(size = 22),
legend.title = element_text(size = 20),
legend.text = element_text(size = 18)
)+
ylab("Number of students")
theme_custom_mod <- theme(
panel.background = element_rect(fill = "white", colour = "black"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_line(colour = "grey", linewidth = 0.25),
panel.grid.minor = element_line(colour = "lightgrey", linewidth = 0.1),
panel.border = element_rect(colour = "black", fill = NA),
axis.text = element_text(size = 20),
axis.title = element_text(size = 25),
legend.title = element_text(size = 25),
legend.text = element_text(size = 20)
)
Наблюдаем, как работает модицифицированная версия
ggplot(hogwarts)+
geom_boxplot(aes(x = fct_reorder(house, `week 3`, .desc = TRUE),
y = `week 3`, fill = blood_status),
colour = "grey49",
outlier.shape = NA,
linewidth = 1.0,
width = 0.5)+
geom_jitter(aes(x = fct_reorder(house, `week 3`, .desc = TRUE),
y = `week 3`), width=0.2, alpha=0.5)+
theme_custom_mod+
scale_x_discrete(name = "House") +
ggtitle("Распределение очков недели 3 по факультетам") +
labs(caption = "по данным Хогвартс")
На мой взгляд, для визулизации гистограмм предпочтительнее разбиение на строки, когда по оси х отображены какие-либо числовые значения, которые остаются общими для всех графиков. В таком случае проще воспринимать различия между распределениями. В случае violin-plot наоборот, нагляднее разбивать по столбцам, так как в этих графиках распределение показано по вертикали. В общем случае, нужно стараться сохранять одинаковой ту непрерывную ось, которая отражает распределение.
ggplot(hogwarts)+
geom_histogram(aes(x = `Divinations_exam`),
fill = "turquoise1",
colour = "grey49",
bins = ceiling(log2(nrow(hogwarts))+1))+
facet_wrap(.~course, nrow = 2) +
theme_bw()+
theme_custom_mod +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 18),
legend.title = element_text(size = 20),
legend.text = element_text(size = 12)
)+
scale_x_continuous(name = "Divinations exam score") +
scale_y_continuous(name = "Number of students") +
ggtitle("Distributions of Divinations exam scores by course")
ggplot(hogwarts) +
geom_density(aes(x = `Defence against the dark arts_exam`, fill = "Dark Arts"), alpha = 0.5, colour = "blue") +
geom_density(aes(x = `Herbology_exam`, fill = "Herbology"), alpha = 0.5, colour = "green") +
facet_wrap(~sex) +
theme_custom_mod +
scale_fill_manual(values = c("Dark Arts" = "blue", "Herbology" = "green")) +
scale_x_continuous(name = "Exam Score") +
scale_y_continuous(name = "Density") +
ggtitle("Density of Exam Scores for Dark Arts and Herbology by Gender") +
labs(caption = "Source: Hogwarts Data")